import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from datetime import datetime
# Set colors
dc_colors = ["#2B3A64", "#96aae3", "#C3681D", "#EFBD95", "#E73F74", "#80BA5A", "#E68310", "#008695", "#CF1C90", "#f97b72", "#4b4b8f", "#A5AA99"]
# Set template
pio.templates["dc"] = go.layout.Template(
layout=dict(
font={"family": "Poppins, Sans-serif", "color": "#505050"},
title={"font": {"family": "Poppins, Sans-serif", "color": "black"}, "yanchor": "top", "y": 0.92, "xanchor": "left", "x": 0.025},
plot_bgcolor="white",
paper_bgcolor="white",
hoverlabel=dict(bgcolor="white"),
margin=dict(l=100, r=50, t=75, b=70),
colorway=dc_colors,
xaxis=dict(showgrid=False),
yaxis=dict(showgrid=True,
gridwidth=0.1,
gridcolor='lightgrey',
showline=True,
nticks=10,
linewidth=1,
linecolor='black',
rangemode="tozero")
)
)
Loading and Inspecting the Data¶
The first thing we will do is use the yfinance package to download market data from the Yahoo! Finance API. This package is prone to break so we install the latest version as well.
We will define the date range that we want to use, as well as the ticker we want to download.
!pip install yfinance --upgrade
Collecting yfinance
Obtaining dependency information for yfinance from https://files.pythonhosted.org/packages/09/05/28664524fcc67c078313d482bf25fe403e9399130622cfc89e185ec0abf6/yfinance-0.2.54-py2.py3-none-any.whl.metadata
Downloading yfinance-0.2.54-py2.py3-none-any.whl.metadata (5.8 kB)
Requirement already satisfied: pandas>=1.3.0 in c:\anaconda3\lib\site-packages (from yfinance) (2.0.3)
Requirement already satisfied: numpy>=1.16.5 in c:\users\vatsal vinay parikh\appdata\roaming\python\python311\site-packages (from yfinance) (1.26.4)
Requirement already satisfied: requests>=2.31 in c:\anaconda3\lib\site-packages (from yfinance) (2.31.0)
Collecting multitasking>=0.0.7 (from yfinance)
Obtaining dependency information for multitasking>=0.0.7 from https://files.pythonhosted.org/packages/3e/8a/bb3160e76e844db9e69a413f055818969c8acade64e1a9ac5ce9dfdcf6c1/multitasking-0.0.11-py3-none-any.whl.metadata
Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Requirement already satisfied: platformdirs>=2.0.0 in c:\anaconda3\lib\site-packages (from yfinance) (3.10.0)
Requirement already satisfied: pytz>=2022.5 in c:\anaconda3\lib\site-packages (from yfinance) (2023.3.post1)
Collecting frozendict>=2.3.4 (from yfinance)
Obtaining dependency information for frozendict>=2.3.4 from https://files.pythonhosted.org/packages/04/13/d9839089b900fa7b479cce495d62110cddc4bd5630a04d8469916c0e79c5/frozendict-2.4.6-py311-none-any.whl.metadata
Downloading frozendict-2.4.6-py311-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
Downloading peewee-3.17.9.tar.gz (3.0 MB)
---------------------------------------- 0.0/3.0 MB ? eta -:--:--
---------------------------------------- 0.0/3.0 MB ? eta -:--:--
--------------------------------------- 0.0/3.0 MB 495.5 kB/s eta 0:00:07
- -------------------------------------- 0.1/3.0 MB 751.6 kB/s eta 0:00:04
-- ------------------------------------- 0.2/3.0 MB 919.0 kB/s eta 0:00:04
-- ------------------------------------- 0.2/3.0 MB 1.1 MB/s eta 0:00:03
--- ------------------------------------ 0.3/3.0 MB 1.1 MB/s eta 0:00:03
---- ----------------------------------- 0.4/3.0 MB 1.2 MB/s eta 0:00:03
----- ---------------------------------- 0.4/3.0 MB 1.2 MB/s eta 0:00:03
------ --------------------------------- 0.5/3.0 MB 1.3 MB/s eta 0:00:02
------- -------------------------------- 0.6/3.0 MB 1.3 MB/s eta 0:00:02
-------- ------------------------------- 0.7/3.0 MB 1.4 MB/s eta 0:00:02
---------- ----------------------------- 0.8/3.0 MB 1.4 MB/s eta 0:00:02
----------- ---------------------------- 0.8/3.0 MB 1.4 MB/s eta 0:00:02
------------ --------------------------- 0.9/3.0 MB 1.5 MB/s eta 0:00:02
------------- -------------------------- 1.0/3.0 MB 1.5 MB/s eta 0:00:02
--------------- ------------------------ 1.1/3.0 MB 1.6 MB/s eta 0:00:02
---------------- ----------------------- 1.2/3.0 MB 1.6 MB/s eta 0:00:02
----------------- ---------------------- 1.3/3.0 MB 1.6 MB/s eta 0:00:02
------------------- -------------------- 1.5/3.0 MB 1.7 MB/s eta 0:00:01
-------------------- ------------------- 1.6/3.0 MB 1.7 MB/s eta 0:00:01
---------------------- ----------------- 1.7/3.0 MB 1.8 MB/s eta 0:00:01
----------------------- ---------------- 1.8/3.0 MB 1.8 MB/s eta 0:00:01
------------------------- -------------- 1.9/3.0 MB 1.8 MB/s eta 0:00:01
-------------------------- ------------- 2.0/3.0 MB 1.8 MB/s eta 0:00:01
--------------------------- ------------ 2.1/3.0 MB 1.8 MB/s eta 0:00:01
----------------------------- ---------- 2.2/3.0 MB 1.9 MB/s eta 0:00:01
------------------------------ --------- 2.3/3.0 MB 1.9 MB/s eta 0:00:01
-------------------------------- ------- 2.5/3.0 MB 1.9 MB/s eta 0:00:01
---------------------------------- ----- 2.6/3.0 MB 2.0 MB/s eta 0:00:01
------------------------------------ --- 2.8/3.0 MB 2.0 MB/s eta 0:00:01
-------------------------------------- - 2.9/3.0 MB 2.0 MB/s eta 0:00:01
--------------------------------------- 3.0/3.0 MB 2.1 MB/s eta 0:00:01
---------------------------------------- 3.0/3.0 MB 2.0 MB/s eta 0:00:00
Installing build dependencies: started
Installing build dependencies: finished with status 'done'
Getting requirements to build wheel: started
Getting requirements to build wheel: finished with status 'done'
Preparing metadata (pyproject.toml): started
Preparing metadata (pyproject.toml): finished with status 'done'
Requirement already satisfied: beautifulsoup4>=4.11.1 in c:\anaconda3\lib\site-packages (from yfinance) (4.12.2)
Requirement already satisfied: soupsieve>1.2 in c:\anaconda3\lib\site-packages (from beautifulsoup4>=4.11.1->yfinance) (2.4)
Requirement already satisfied: python-dateutil>=2.8.2 in c:\anaconda3\lib\site-packages (from pandas>=1.3.0->yfinance) (2.8.2)
Requirement already satisfied: tzdata>=2022.1 in c:\anaconda3\lib\site-packages (from pandas>=1.3.0->yfinance) (2023.3)
Requirement already satisfied: charset-normalizer<4,>=2 in c:\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (2.0.4)
Requirement already satisfied: idna<4,>=2.5 in c:\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (3.4)
Requirement already satisfied: urllib3<3,>=1.21.1 in c:\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (1.26.16)
Requirement already satisfied: certifi>=2017.4.17 in c:\anaconda3\lib\site-packages (from requests>=2.31->yfinance) (2024.8.30)
Requirement already satisfied: six>=1.5 in c:\anaconda3\lib\site-packages (from python-dateutil>=2.8.2->pandas>=1.3.0->yfinance) (1.16.0)
Downloading yfinance-0.2.54-py2.py3-none-any.whl (108 kB)
---------------------------------------- 0.0/108.7 kB ? eta -:--:--
---------------------------------------- 108.7/108.7 kB 2.1 MB/s eta 0:00:00
Downloading frozendict-2.4.6-py311-none-any.whl (16 kB)
Downloading multitasking-0.0.11-py3-none-any.whl (8.5 kB)
Building wheels for collected packages: peewee
Building wheel for peewee (pyproject.toml): started
Building wheel for peewee (pyproject.toml): finished with status 'done'
Created wheel for peewee: filename=peewee-3.17.9-py3-none-any.whl size=139095 sha256=e40c4484df69f4af6bc5fc21616ba68b917a949f3fbd58e55828fbb0df9a0c5e
Stored in directory: c:\users\vatsal vinay parikh\appdata\local\pip\cache\wheels\f4\14\e4\50c88c865833085aeb91e2bd40e3a683ff434806386b8ee7bc
Successfully built peewee
Installing collected packages: peewee, multitasking, frozendict, yfinance
Successfully installed frozendict-2.4.6 multitasking-0.0.11 peewee-3.17.9 yfinance-0.2.54
# Import yfinance
import yfinance as yf
# Set the date range
start = "2020-01-01"
stop = "2023-02-01"
# Set the ticker we want to use (GameStop)
ticker = "GME"
# Get the data for the ticker GME, use multi_level_index=False
gme = yf.download(ticker, start, stop, multi_level_index=False)
# Preview DataFrame
gme
YF.download() has changed argument auto_adjust default to True
[*********************100%***********************] 1 of 1 completed
| Close | High | Low | Open | Volume | |
|---|---|---|---|---|---|
| Date | |||||
| 2020-01-02 | 1.577500 | 1.617500 | 1.517500 | 1.535000 | 17814400 |
| 2020-01-03 | 1.470000 | 1.562500 | 1.460000 | 1.552500 | 14175600 |
| 2020-01-06 | 1.462500 | 1.477500 | 1.400000 | 1.450000 | 13579200 |
| 2020-01-07 | 1.380000 | 1.457500 | 1.360000 | 1.442500 | 20912000 |
| 2020-01-08 | 1.430000 | 1.462500 | 1.352500 | 1.372500 | 22517600 |
| ... | ... | ... | ... | ... | ... |
| 2023-01-25 | 20.230000 | 20.840000 | 19.530001 | 20.590000 | 3515800 |
| 2023-01-26 | 20.010000 | 21.170000 | 19.379999 | 20.610001 | 3520000 |
| 2023-01-27 | 22.820000 | 23.309999 | 19.410000 | 19.799999 | 11897200 |
| 2023-01-30 | 21.250000 | 23.480000 | 21.129999 | 22.500000 | 4950600 |
| 2023-01-31 | 21.870001 | 22.049999 | 21.299999 | 21.410000 | 3128400 |
776 rows × 5 columns
We can also use the .describe() method to get a sense of the data over the period.
# Get a numeric summary of the data
gme.describe()
| Close | High | Low | Open | Volume | |
|---|---|---|---|---|---|
| count | 776.000000 | 776.000000 | 776.000000 | 776.000000 | 7.760000e+02 |
| mean | 24.452030 | 25.939124 | 23.237738 | 24.562152 | 3.110780e+07 |
| std | 18.981395 | 20.480236 | 17.971932 | 19.209501 | 7.076838e+07 |
| min | 0.700000 | 0.735000 | 0.642500 | 0.712500 | 1.122700e+06 |
| 25% | 2.634375 | 2.787500 | 2.506875 | 2.640625 | 6.793800e+06 |
| 50% | 26.115000 | 27.490000 | 25.133750 | 26.085000 | 1.257080e+07 |
| 75% | 39.628749 | 40.998749 | 37.886249 | 39.750000 | 2.489090e+07 |
| max | 86.877502 | 120.750000 | 72.877502 | 94.927498 | 7.886316e+08 |
Visualizing the data¶
Next, we can use a Plotly line plot to examine the data over time.
# Create a Plotly figure
fig = px.line(gme,
x = gme.index,
y = "Close",
template="dc",
title= "GameStop Closing Price (daily)"
)
# Show the plot
fig.show()
Let's add an annotation to make it clear when key events happened. We will cover three key events in the timeline:
- The date that the new board was announced, and r/wallstreetbets began hyping the stock.
- The date when the trading app RobinHood restricted trading for GameStop (and some other stocks).
- An late February surge fueld by more activity on r/wallstreetbets.
Note: due to a bug with Plotly, we need to use strptime() to convert the dates to milliseconds to enable our annotations.
# Create a filtered DataFrame for early 2021
gme_2021 = gme["2021-01": "2021-03"]
# Create a Plotly figure
fig = px.line(gme_2021,
x = gme_2021.index,
y = "Close",
template="dc",
title= "GameStop Closing Price (daily)"
)
# Define three key events
short = datetime.strptime("2021-01-11", "%Y-%m-%d").timestamp() * 1000
robin = datetime.strptime("2021-01-28", "%Y-%m-%d").timestamp() * 1000
late_feb = datetime.strptime("2021-02-23", "%Y-%m-%d").timestamp() * 1000
# Add these as lines
fig.add_vline(x=short, line_width = 0.5, annotation_text = "r/wallstreetbets")
fig.add_vline(x=robin, line_width = 0.5, annotation_text = "Robinhood")
fig.add_vline(x=late_feb, line_width = 0.5, annotation_text = "Memes")
# Show the plot
fig.show()
Alternatively, we can use a candlestick chart to get a good sense of price action.
# Define the candlestick data
candlestick = go.Candlestick(
x = gme.index,
open = gme["Open"],
high = gme["High"],
low = gme["Low"],
close = gme["Close"]
)
# Create a candlestick figure
fig = go.Figure(data = candlestick)
fig.update_layout(title = "GME Prices (Candlestick)", template = "dc" )
# Show the plot
fig.show()
Rolling averages¶
The data is quite noisy. We can also use a window function to calculate the rolling mean over a certain number of periods. In our case, we'll use the past 28 days of data.
This also smooths out the line, and still gives day-by-day performance.
# Calculate the 28 day rolling mean price
gme_rolling = gme.rolling("28D").mean()
# Plot the rolling average
fig = px.line(gme_rolling,
x = gme_rolling.index,
y = "Close",
template="dc",
title= "GameStop Closing Price (Rolling 28 Day mean) "
)
# Show the plot
fig.show()
Comparing to a benchmark¶
It would be nice to be able to compare the performance of GameStop against a stock market index such as the S&P 500 (an index tracking the performance of 500 large US companies).
# Get the data for the ticker GSPC, use multi_level_index=False
sp = yf.download("^GSPC", start, stop, multi_level_index=False)
# Rename close columns
sp = sp.rename(columns = {"Close": "S&P Close"})
gme = gme.rename(columns = {"Close": "GameStop Close"})
# Concatenate the data
all_data = pd.concat([gme["GameStop Close"], sp["S&P Close"]], axis = 1)
# Preview the data
all_data
[*********************100%***********************] 1 of 1 completed
| GameStop Close | S&P Close | |
|---|---|---|
| Date | ||
| 2020-01-02 | 1.577500 | 3257.850098 |
| 2020-01-03 | 1.470000 | 3234.850098 |
| 2020-01-06 | 1.462500 | 3246.280029 |
| 2020-01-07 | 1.380000 | 3237.179932 |
| 2020-01-08 | 1.430000 | 3253.050049 |
| ... | ... | ... |
| 2023-01-25 | 20.230000 | 4016.219971 |
| 2023-01-26 | 20.010000 | 4060.429932 |
| 2023-01-27 | 22.820000 | 4070.560059 |
| 2023-01-30 | 21.250000 | 4017.770020 |
| 2023-01-31 | 21.870001 | 4076.600098 |
776 rows × 2 columns
As you can see, the prices are on a much different scale than GameStop. Let's normalize the prices so they start at 100. To do this, we will:
- Divide all prices by the first price in the series.
- Multiply them by 100.
All prices will then be relative to the starting point. This way, we can compare large the change is between the two time series, regardless of their starting values.
# Select first prices
first_prices = all_data.iloc[0]
# Create normalized_prices
normalized_prices = all_data.div(first_prices).mul(100)
# Normalized
normalized_prices
| GameStop Close | S&P Close | |
|---|---|---|
| Date | ||
| 2020-01-02 | 100.000000 | 100.000000 |
| 2020-01-03 | 93.185423 | 99.294013 |
| 2020-01-06 | 92.709983 | 99.644856 |
| 2020-01-07 | 87.480191 | 99.365527 |
| 2020-01-08 | 90.649760 | 99.852662 |
| ... | ... | ... |
| 2023-01-25 | 1282.408857 | 123.278231 |
| 2023-01-26 | 1268.462784 | 124.635260 |
| 2023-01-27 | 1446.592704 | 124.946205 |
| 2023-01-30 | 1347.068158 | 123.325810 |
| 2023-01-31 | 1386.370906 | 125.131604 |
776 rows × 2 columns
We will .melt() the DataFrame to make it easier to plot the two time series.
# Melt the DataFrame to assist with plotting
normalized_melt = normalized_prices.reset_index().melt(id_vars = "Date",var_name = "Ticker", value_name = "Closing Price")
# Preview the newly formatted data
normalized_melt
| Date | Ticker | Closing Price | |
|---|---|---|---|
| 0 | 2020-01-02 | GameStop Close | 100.000000 |
| 1 | 2020-01-03 | GameStop Close | 93.185423 |
| 2 | 2020-01-06 | GameStop Close | 92.709983 |
| 3 | 2020-01-07 | GameStop Close | 87.480191 |
| 4 | 2020-01-08 | GameStop Close | 90.649760 |
| ... | ... | ... | ... |
| 1547 | 2023-01-25 | S&P Close | 123.278231 |
| 1548 | 2023-01-26 | S&P Close | 124.635260 |
| 1549 | 2023-01-27 | S&P Close | 124.946205 |
| 1550 | 2023-01-30 | S&P Close | 123.325810 |
| 1551 | 2023-01-31 | S&P Close | 125.131604 |
1552 rows × 3 columns
# Create a plot of the melted data
fig = px.line(normalized_melt, x = "Date", y = "Closing Price", color = "Ticker", template = "dc",
title = "GameStop vs. S&P 500 Closing Price(normalized)"
)
# Show the plot
fig.show()
Plotting the Autocorrelation Function¶
Autocorrelation is the correlation of a time series with a lagged version of itself. Plotting it can give you an idea of how lagged periods correlate to the present period.
First, let's get some recent data from when GameStop seems to have stabilized.
# Get recent data for GME, use multi_level_index=False
gme_recent = yf.download("GME", "2023-01-01", "2023-02-21", multi_level_index=False)
# Preview the data
gme_recent
[*********************100%***********************] 1 of 1 completed
| Close | High | Low | Open | Volume | |
|---|---|---|---|---|---|
| Date | |||||
| 2023-01-03 | 17.200001 | 19.260000 | 17.090000 | 18.639999 | 5135200 |
| 2023-01-04 | 17.320000 | 17.930000 | 16.900000 | 17.250000 | 3939300 |
| 2023-01-05 | 16.219999 | 17.260000 | 15.890000 | 17.059999 | 6066200 |
| 2023-01-06 | 16.459999 | 16.570000 | 15.410000 | 16.000000 | 4823400 |
| 2023-01-09 | 16.379999 | 17.129999 | 16.360001 | 16.650000 | 3522600 |
| 2023-01-10 | 17.770000 | 18.090000 | 16.250000 | 16.299999 | 4402800 |
| 2023-01-11 | 19.040001 | 20.049999 | 17.860001 | 18.190001 | 8405800 |
| 2023-01-12 | 20.629999 | 20.629999 | 18.340000 | 19.040001 | 5877300 |
| 2023-01-13 | 20.490000 | 21.110001 | 19.799999 | 19.879999 | 5494400 |
| 2023-01-17 | 21.799999 | 21.940001 | 20.370001 | 20.490000 | 5407900 |
| 2023-01-18 | 20.790001 | 22.150000 | 20.500000 | 22.010000 | 5632200 |
| 2023-01-19 | 19.040001 | 20.450001 | 18.820000 | 20.440001 | 4519200 |
| 2023-01-20 | 19.610001 | 20.070000 | 18.900000 | 19.049999 | 3564300 |
| 2023-01-23 | 21.660000 | 22.490000 | 19.370001 | 19.500000 | 7181500 |
| 2023-01-24 | 21.400000 | 22.090000 | 21.000000 | 21.299999 | 2381600 |
| 2023-01-25 | 20.230000 | 20.840000 | 19.530001 | 20.590000 | 3515800 |
| 2023-01-26 | 20.010000 | 21.170000 | 19.379999 | 20.610001 | 3520000 |
| 2023-01-27 | 22.820000 | 23.309999 | 19.410000 | 19.799999 | 11897200 |
| 2023-01-30 | 21.250000 | 23.480000 | 21.129999 | 22.500000 | 4950600 |
| 2023-01-31 | 21.870001 | 22.049999 | 21.299999 | 21.410000 | 3128400 |
| 2023-02-01 | 21.830000 | 22.120001 | 20.549999 | 21.490000 | 4517200 |
| 2023-02-02 | 22.700001 | 24.400000 | 22.219999 | 22.440001 | 7655700 |
| 2023-02-03 | 22.250000 | 23.930000 | 21.799999 | 22.010000 | 4935900 |
| 2023-02-06 | 23.860001 | 24.879999 | 21.770000 | 21.879999 | 8708400 |
| 2023-02-07 | 21.219999 | 23.049999 | 20.500000 | 23.000000 | 9093700 |
| 2023-02-08 | 21.100000 | 21.549999 | 20.610001 | 21.430000 | 2846400 |
| 2023-02-09 | 19.670000 | 21.650000 | 19.670000 | 21.250000 | 4497400 |
| 2023-02-10 | 19.270000 | 20.040001 | 19.049999 | 19.559999 | 2902700 |
| 2023-02-13 | 19.690001 | 19.940001 | 19.010000 | 19.299999 | 2299300 |
| 2023-02-14 | 19.870001 | 20.010000 | 19.100000 | 19.309999 | 2746300 |
| 2023-02-15 | 21.799999 | 21.799999 | 19.590000 | 19.799999 | 7137800 |
| 2023-02-16 | 21.600000 | 22.420000 | 20.850000 | 21.250000 | 3879000 |
| 2023-02-17 | 21.990000 | 22.070000 | 21.150000 | 21.270000 | 2851000 |
We will use the acf() function to generate the autocorrelation function for the most recent GameStop data.
# Import acf
from statsmodels.tsa.stattools import acf
# Calculate the acf array for the recent GameStop data
acf_array = acf(gme_recent["Close"], nlags = 10)
# Generate a scatter plot
fig = px.scatter(acf_array, template = "dc")
# Fix the range and layout
fig.update_xaxes(range = [0,10])
fig.update_layout(showlegend = False)
# Show the plot
fig.show()
First we need to fix the index before making forecasts.
# Set the index to the correct period
gme_recent.index = pd.DatetimeIndex(gme_recent.index).to_period("B")
# Set a new date index to handle the gaps
new_index = pd.period_range(gme_recent.index[0], gme_recent.index[-1])
gme_recent = gme_recent.reindex(new_index)
Making Simple Forecasts¶
Finally, we are going to fit a model to the GameStop data up until the first of February and make a forecast. We are going to use an AR(1) model.
$\large \quad \quad \quad \quad R\_t \quad \ \ = \quad \mu \quad + \quad \phi \quad R\_{t-1} \quad \ + \quad \epsilon\_t$
An AR(1) model calculates the current value as a mean plus a fraction ( $ \phi $ ) of yesterday's value and some noise.
- If $ \phi $ is 0 then the process is just noise.
- If $ \phi $ is 1 then the process is a random walk.
# Import the ARIMA class
from statsmodels.tsa.arima.model import ARIMA
# Fit an AR(1) model to the data
mod = ARIMA(gme_recent["Close"], order = (1, 0, 0))
res = mod.fit()
# Print the model summary
res.summary()
| Dep. Variable: | Close | No. Observations: | 34 |
|---|---|---|---|
| Model: | ARIMA(1, 0, 0) | Log Likelihood | -52.228 |
| Date: | Sun, 09 Mar 2025 | AIC | 110.455 |
| Time: | 08:51:34 | BIC | 115.035 |
| Sample: | 01-03-2023 | HQIC | 112.017 |
| - 02-17-2023 | |||
| Covariance Type: | opg |
| coef | std err | z | P>|z| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 20.1466 | 0.882 | 22.838 | 0.000 | 18.418 | 21.876 |
| ar.L1 | 0.8186 | 0.107 | 7.668 | 0.000 | 0.609 | 1.028 |
| sigma2 | 1.3209 | 0.362 | 3.649 | 0.000 | 0.611 | 2.030 |
| Ljung-Box (L1) (Q): | 0.20 | Jarque-Bera (JB): | 0.54 |
|---|---|---|---|
| Prob(Q): | 0.65 | Prob(JB): | 0.76 |
| Heteroskedasticity (H): | 1.27 | Skew: | 0.26 |
| Prob(H) (two-sided): | 0.70 | Kurtosis: | 2.66 |
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
Comparing different models¶
We ran the model with one lagged parameter. But how does our model compare to one with a different order? We can use the Akaike Information Criterion (AIC) and the Bayesian Information Criterion (BIC) to compare goodness of fit for different orders.
# Initialize an empty array
bic = []
# Loop through a range of AR models and get the BIC
for i in range(0,10):
mod = ARIMA(gme_recent["Close"], order = (i,0,0))
res = mod.fit()
bic.append(res.bic)
# Plot the BIC
fig = px.line(bic, template = 'dc')
fig.update_yaxes(rangemode = "normal")
fig.show()
It looks like the lowest BIC occurs at lag 1. We can now use the get_forecast method to make estimates out of sample (i.e., past the range of our data).
# Get data up until a week ago
gme_recent_trunc = gme_recent[:"2023-02-09"]
# Estimate an AR(1) model
mod = ARIMA(gme_recent_trunc["Close"], order = (1,0,0))
res = mod.fit()
# Create the forecasts as a DataFrame
preds = res.get_forecast(steps = 7).summary_frame()
# View the forecasts
preds
| Close | mean | mean_se | mean_ci_lower | mean_ci_upper |
|---|---|---|---|---|
| 2023-02-10 | 19.691563 | 1.204579 | 17.330631 | 22.052494 |
| 2023-02-13 | 19.709093 | 1.552465 | 16.666319 | 22.751868 |
| 2023-02-14 | 19.723346 | 1.744748 | 16.303702 | 23.142991 |
| 2023-02-15 | 19.734935 | 1.860975 | 16.087490 | 23.382379 |
| 2023-02-16 | 19.744356 | 1.933972 | 15.953840 | 23.534872 |
| 2023-02-17 | 19.752016 | 1.980748 | 15.869822 | 23.634210 |
| 2023-02-20 | 19.758244 | 2.011070 | 15.816620 | 23.699867 |
Plot the forecast¶
Finally, we can create a Plotly chart to visualize the forecasts with the confidence intervals.
# Create a figure containing predicted, real, and CI values
fig = go.Figure([
go.Scatter(
name='True value',
x=gme_recent.index.to_timestamp(),
y=gme_recent["Close"],
mode='lines'
),
go.Scatter(
name='Predicted value',
x=preds.index.to_timestamp(),
y=preds["mean"],
mode='lines'
),
go.Scatter(
name='Upper',
x=preds.index.to_timestamp(),
y=preds["mean_ci_upper"],
mode='lines',
line=dict(color='lightblue', width=0)
),
go.Scatter(
name='Lower',
x=preds.index.to_timestamp(),
y=preds["mean_ci_lower"],
mode='lines',
line=dict(color='lightblue', width=0),
fill="tonexty"
),
])
# Update the layout and show the plot
fig.update_layout(
yaxis_title='Price',
title='GameStop Price and Forecast over Time',
showlegend=False,
template="dc"
)
fig.show()